***********************
Taxsim 1964-1967 Using
CPS Utilities Data
Program 2 of 4 

4/29/10:  EDITED TO RUN ON JIM'S COMPUTER.
		  ONE MAJOR CHANGE: GOT RID OF FAMID2 BECAUSE THE VARIABLE FAMID 
		  ALREADY ASSIGNS A DIFFEREND ID TO THE PRIMARY FAMILIY AND THE RELATED 
		  SUMFAMILY.  NOTE: WANT TO ALLOW THEM TO BE SEPARATE TAX FILING UNITS.
***********************;
/*
filename out_1964 'N:\Private\Poverty Trends\SAS\cps\output\taxsim1964_state0.txt';
filename out_1965 'N:\Private\Poverty Trends\SAS\cps\output\taxsim1965_state0.txt';
filename out_1966 'N:\Private\Poverty Trends\SAS\cps\output\taxsim1966_state0.txt';
filename out_1967 'N:\Private\Poverty Trends\SAS\cps\output\taxsim1967_state0.txt';
*/
%macro read(surv_yr);
data taxsim&surv_yr;
set cpsdata.cps&surv_yr;

* CREATE # UNDER 14 ALLOWING RELATED SUBFAMILIES TO BE SEPARATE FROM PRIMARY FAMILIES, AND 
  ASSUMING ALL KIDS UNDER 14 ARE IN RELATED SUBFAMILIY IS THERE IS A RELATED SUBFAMILY;
If famtyp=1 and famdesc=1 then num_under14=0;
Else num_under14=MAX(0,familydifference);
*IF famtyp=1 AND relsub_head_id NE . THEN num_under14=0;
 *  ELSE num_under14=MAX(0,familydifference);

run;

proc sort data=taxsim&surv_yr;
by famid;
run;



data taxsim&surv_yr;
set taxsim&surv_yr;
by famid;

* Create var for person number to be used in id var;
RETAIN counter 0; 
counter= counter+1;
IF FIRST.famid THEN DO; 
counter=1;
END;


	IF 1960 LE &surv_yr LE 1970 THEN inc_test=600;  
	

IF (famrel IN(2,3))

		AND wkswrk IN (0,1) 
		AND marstat IN(0,4,5,7) 
		AND ptotinc LT inc_test 
		THEN dep=1;


ELSE IF (famrel IN(2,3)) 
			AND 19 LE age LE 23 
			
			AND wkswrk IN (0,1)  
		THEN dep=1;


ELSE IF (famrel=2)
			AND age LE 18 THEN dep=1;
ELSE dep=0;


IF (famrel=2) 
			AND age LE 16 THEN kids3=1;
ELSE kids3=0;


IF (famrel=2) 
			AND age LE 18 THEN kids2=1;
ELSE IF (famrel=2)  
			AND 19 LE age LE 23 
			AND rsnnotw IN(1,3) THEN kids2=1;
ELSE kids2=0;

/* **********************   JH's add   ************************ */
IF (famrel=2) AND age LE 17 THEN dep18=1; ELSE dep18=0;
IF (famrel=2) AND age LE 16 THEN dep17=1; ELSE dep17=0;
IF (famrel=2) AND age LE 12 THEN dep13=1; ELSE dep13=0;
IF famrel=1 THEN spage=age; ELSE spage=0;
/* ************************************************************ */ 
* calc num of taxpayers over 65, exclude dependents over 65;
IF age GT 65 AND dep=0 THEN gt65=1;
ELSE gt65=0;

IF famrel=1 THEN wife_inc=ptotearn;
ELSE wife_inc=0;

run;

proc sort data=taxsim&surv_yr;
by famid;
run;

proc means data=taxsim&surv_yr sum noprint;
by famid;
* restrict to husbands, wives, and their dependents;
where famrel IN(0,1) OR dep=1;
var dep kids2 kids3 wife_inc gt65 incuer ptotearn dep18 dep17 dep13 spage;
output out=temp1 sum=totdep kid_eitc kid_fcc tfu_wifi tfu_gt65 tfu_unearn tfu_earn totdep18 totdep17 totdep13 sage;
run;

data cpsdata.taxsim&surv_yr;
merge taxsim&surv_yr(IN=in1) 
	  temp1(KEEP=famid totdep kid_eitc kid_fcc tfu_wifi tfu_gt65 tfu_unearn tfu_earn totdep18 totdep17 totdep13 sage);
by famid;
if in1;
* Restrict sample to heads of tax filing unit;
* Delete spouses;
IF famrel=1 THEN DELETE;
* Delete dependents;
IF dep=0;

     ARRAY tax_vars totdep kid_eitc kid_fcc tfu_wifi tfu_gt65 tfu_unearn tfu_earn totdep18 totdep17 totdep13 sage;
	 DO OVER tax_vars;
	 IF tax_vars=. THEN tax_vars=0;
	 END; 


*adjustments for taxsim;
tfu_wifi=MAX(tfu_wifi,0);
*adjustment for fact that those under 14 are not in CPS;
totdep=MIN(totdep+num_under14,15);
kid_eitc=MIN(kid_eitc+num_under14,15);
kid_fcc=MIN(kid_fcc+num_under14,15);
totdep18=MIN(totdep18,15);
totdep17=MIN(totdep17,15);
totdep13=MIN(totdep13,15);


/* NEW CODE: RECODE THE INDEPENDENT CHILDREN (WHO ARE THEIR OWN TAX FILING UNIT) TO HAVE NO SPOUSAL INCOME, DEPENDENTS, ETC */
/* ****************  Variable added by JH   ******************** */
IF famrel NE 0 THEN tfu_wifi=0;
IF famrel NE 0 THEN totdep=0;
IF famrel NE 0 THEN kid_fcc=0;
IF famrel NE 0 THEN kid_eitc=0;
IF famrel NE 0 THEN totdep18=0;
IF famrel NE 0 THEN totdep17=0;
IF famrel NE 0 THEN totdep13=0;
IF famrel NE 0 THEN sage=0;


IF famrel=0 THEN taxinc=tfu_earn-tfu_wifi;
ELSE taxinc=ptotearn;
taxinc=MAX(taxinc,0);

* USING 1966 SURVEY DATA (DUNERN IS=. FOR ALL OBS IN 64 & 65), SHARES UNEARNED INCOME 
  FOR SOC SEC, INT, AND DIV RESPECTIVELY ARE:
	FOR THOSE 65+: 67%, 26%, 7%
	FOR THOSE UNDER 65: 8%, 84%, 8%;
*For 1964, 1965 we combine interest and dividend so for those 65+: Social Security = 67% and Div = 33%
for those under 65 Social Security = 8% and Div= 92%;
if &surv_yr IN(1964 1965) or (&surv_yr IN(1966 1967) and dunern=.) then do;
* DIVIDEND INCOME.  IF VALUES ARE NEGATIVE ASSIGN ALL TO Interest INCOME;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc9=MAX(0,0.33*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc9=MAX(0,0.33*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc9=MAX(0,0.92*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc9=MAX(0,0.92*incuer);

* INTEREST INCOME - only negative values;
  IF famrel=0 AND tfu_gt65 GE 1 THEN inc10=MIN(0,0.33*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc10=MIN(0,0.33*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc10=MIN(0,0.92*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc10=MIN(0,0.92*incuer);


* SOCIAL SECURITY;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc12=MAX(0,0.67*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc12=MAX(0,0.67*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc12=MAX(0,0.08*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc12=MAX(0,0.08*incuer);
end;

if &surv_yr IN(1966 1967) then do;
*For this exericse we set al rent income equal to zero - this is why for example we can treat dunern=02 the same 
as dunern=10 (Just social security and social security with rent) - in this case we just classify both categories
as being all unearned income is associated with social security.  We set rent equal to zero because in 1971 we can not distinguish
between dividend, interest, and income.  This is also the reason we set interest equal to zero - as we do in the Taxsim Macro for 
1968-1972.  The Shares we attain for the below analysis come from the 1971 CPS Utilities data and can be manipulated using the 
SAS Program - Taxsim Shares for 1966/1967 CPS from 1971 CPS;

*One has no unearned income or unearned income not from Social Security, interest, or dividends;
if dunern IN(01 09 17 25 99) then inc9=0;
if dunern IN(01 09 17 25 99) then inc12=0;
if dunern IN(01 09 17 25 99) then inc10=0;
*All unearned income due to social security - No negative Social Security Income thats why we invoke the max;
if dunern IN (02 10) then do;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc12=MAX(0,tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc12=MAX(0,incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc12=MAX(0,tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc12=MAX(0,incuer);
inc9=0;
inc10=0;
end;
*All unearned income due to dividend income;
if dunern IN(03 05 07 11 13 15) then do;
*If positive then dividend if negative then interest;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc9=MAX(0,tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc9=MAX(0,incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc9=MAX(0,tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc9=MAX(0,incuer);
* INTEREST INCOME - only negative values;
  IF famrel=0 AND tfu_gt65 GE 1 THEN inc10=MIN(0,tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc10=MIN(0,incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc10=MIN(0,tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc10=MIN(0,incuer);
inc12=0;
end;
end;
*Case where unearned income comes from Social Security and Dividend Income (Dunern IN(04 06 08 12 14 16));
if dunern IN(04 06 08 12 14 16) and &surv_yr IN(1966 1967) then do;
* DIVIDEND INCOME.  IF VALUES ARE NEGATIVE ASSIGN ALL TO Interest INCOME;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc9=MAX(0,0.33*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc9=MAX(0,0.33*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc9=MAX(0,0.30*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc9=MAX(0,0.30*incuer);
* INTEREST INCOME.  IF VALUES ARE NEGATIVE ASSIGN ALL TO Interest INCOME;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc10=MIN(0,0.33*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc10=MIN(0,0.33*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc10=MIN(0,0.30*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc10=MIN(0,0.30*incuer);
* SOCIAL SECURITY;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc12=MAX(0,0.67*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc12=MAX(0,0.67*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc12=MAX(0,0.70*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc12=MAX(0,0.70*incuer);
  end;

*Case where unearned income comes from Social Security and Public Assistance (Dunern IN(18 26));
if dunern IN(18 26) and &surv_yr IN(1966 1967) then do  ;
inc9=0;
inc10=0;

* SOCIAL SECURITY;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc12=MAX(0,0.60*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc12=MAX(0,0.60*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc12=MAX(0,0.58*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc12=MAX(0,0.58*incuer);
end;

*Case where unearned income comes from Dividend and Public Assistance (Dunern IN(19 21 23 27 29 31));
if dunern IN(19 21 23 27 29 31) and &surv_yr IN(1966 1967) then do;
* DIVIDEND INCOME.  IF VALUES ARE NEGATIVE ASSIGN ALL TO INTEREST INCOME;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc9=MAX(0,0.39*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc9=MAX(0,0.39*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc9=MAX(0,0.23*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc9=MAX(0,0.23*incuer);
* INTEREST INCOME.  IF VALUES ARE NEGATIVE ASSIGN ALL TO INTEREST INCOME;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc10=MIN(0,0.39*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc10=MIN(0,0.39*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc10=MIN(0,0.23*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc10=MIN(0,0.23*incuer);
inc12=0;
end;

*Case where unearned income comes from Social Security and Dividend and Public Assistance (Dunern IN(20 22 24 28 30 32));
if dunern IN(20 22 24 28 30 32) and &surv_yr IN(1966 1967) then do;
* DIVIDEND INCOME.  IF VALUES ARE NEGATIVE ASSIGN ALL TO INTEREST INCOME;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc9=MAX(0,0.06*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc9=MAX(0,0.06*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc9=MAX(0,0.01*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc9=MAX(0,0.01*incuer);
* INTEREST INCOME.  IF VALUES ARE NEGATIVE ASSIGN ALL TO INTEREST INCOME;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc10=MIN(0,0.06*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc10=MIN(0,0.06*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc10=MIN(0,0.01*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc10=MIN(0,0.01*incuer);
* SOCIAL SECURITY;
IF famrel=0 AND tfu_gt65 GE 1 THEN inc12=MAX(0,0.61*tfu_unearn);
  ELSE IF famrel NE 0 AND age GE 65 THEN inc12=MAX(0,0.61*incuer);
  ELSE IF famrel=0 AND tfu_gt65=0 THEN inc12=MAX(0,0.81*tfu_unearn);
  ELSE IF famrel NE 0 AND age LT 65 THEN inc12=MAX(0,0.81*incuer);
  end;


* SET RET INC TO ZERO;
inc11=0;
* UI not collected in 64-67, NOR IS IT TAXED;
inc18=0;


*For years 1964 and 1967 Social Security includes veterans payment and gov't pensions which are not included in 
1965, 1966 or 1968-1972.  For 1965, 1966, and 1968-1972 veterans payments and gov't pensions
are located in Uiinc "incomp" variable from CPS Utilities.
See CPS Utilities Data, specifically variable description for Dunern for 1964-1967, see variables: 
incss and incomp for years 1968-1972 (via CPS Utilities);

*It should also be noted that beginning in 1969 (through 1972) we have yes/no indicators concerning what constitutes unearned income.
This is important, beginning in 1968 we attain 5 different measures of unearned income: Social Security (incss), UI (incomp), Dividend (incint), 
Welfare and Public Assistance (welfinc) and All other types of assistance (incoth) (Names come from CPS Utilites).  Within these 5 distinct categories
there may be many features that define these 5 categories.  For example for "incomp" for 1968-1972 there are 4 components: Dividend income, 
Interest income, veterans payments, and govt pension.  It may be possible in future work, using these yes/no identifiers to find situations
where "incomp" is just dividend or just interest income, etc....  Right now in the code we just give all unearned income in the "incomp" 
category to dividend income and set interest income equal to zero (because they are taxed the same).  In future work one may want to 
look into separating this out alittle bit more.;

IF famrel=0 or (relhd=1 and famtyp NE 4) THEN num_gt65=tfu_gt65;
ELSE num_gt65=gt65;

id=famid*100+counter;

/* ****************  codes are edited by JH   ******************** */
 IF marstat IN(1,2,3) or sage>0 THEN filestat=2;
   ELSE filestat=1; 


IF state=53 THEN ts_state=1;
ELSE IF state=85 THEN ts_state=2;
ELSE IF state=76 THEN ts_state=3;
ELSE IF state=65 THEN ts_state=4;
ELSE IF state=89 THEN ts_state=5;
ELSE IF state=74 THEN ts_state=6;
ELSE IF state=6 THEN ts_state=7;
ELSE IF state=41 THEN ts_state=8;
ELSE IF state=43 THEN ts_state=9;
ELSE IF state=49 THEN ts_state=10;
ELSE IF state=48 THEN ts_state=11;
ELSE IF state=86 THEN ts_state=12;
ELSE IF state=72 THEN ts_state=13;
ELSE IF state=25 THEN ts_state=14;
ELSE IF state=23 THEN ts_state=15;
ELSE IF state=32 THEN ts_state=16;
ELSE IF state=37 THEN ts_state=17;
ELSE IF state=51 THEN ts_state=18;
ELSE IF state=66 THEN ts_state=19;
ELSE IF state=1 THEN ts_state=20;
ELSE IF state=42 THEN ts_state=21;
ELSE IF state=2 THEN ts_state=22;
ELSE IF state=26 THEN ts_state=23;
ELSE IF state=31 THEN ts_state=24;
ELSE IF state=54 THEN ts_state=25;
ELSE IF state=33 THEN ts_state=26;
ELSE IF state=71 THEN ts_state=27;
ELSE IF state=36 THEN ts_state=28;
ELSE IF state=78 THEN ts_state=29;
ELSE IF state=3 THEN ts_state=30;
ELSE IF state=11 THEN ts_state=31;
ELSE IF state=75 THEN ts_state=32;
ELSE IF state=10 THEN ts_state=33;
ELSE IF state=47 THEN ts_state=34;
ELSE IF state=34 THEN ts_state=35;
ELSE IF state=24 THEN ts_state=36;
ELSE IF state=67 THEN ts_state=37;
ELSE IF state=88 THEN ts_state=38;
ELSE IF state=13 THEN ts_state=39;
ELSE IF state=5 THEN ts_state=40;
ELSE IF state=46 THEN ts_state=41;
ELSE IF state=35 THEN ts_state=42;
ELSE IF state=52 THEN ts_state=43;
ELSE IF state=68 THEN ts_state=44;
ELSE IF state=77 THEN ts_state=45;
ELSE IF state=4 THEN ts_state=46;
ELSE IF state=44 THEN ts_state=47;
ELSE IF state=87 THEN ts_state=48;
ELSE IF state=45 THEN ts_state=49;
ELSE IF state=22 THEN ts_state=50;
ELSE IF state=73 THEN ts_state=51;
ELSE IF state=27 THEN ts_state=10;


fill=0;

KEEP id ref_year ts_state state filestat totdep num_gt65 taxinc tfu_wifi
   inc9 inc10 inc11 inc12 fill inc18 kid_fcc kid_eitc totdep18 totdep17 totdep13 age sage;
RUN;

PROC MEANS;
RUN;
/*
DATA _NULL_;
   SET cpsdata.taxsim&surv_yr;
   FILE out_&surv_yr lrecl=500;
   PUT id ref_year fill filestat totdep num_gt65 taxinc tfu_wifi
   inc9 inc10 inc11 inc12 fill fill fill fill fill inc18 kid_fcc fill fill fill;
RUN;
*/
%mend read;

%read(1964);
%read(1965);
%read(1966);
%read(1967);

****************************************************************
 FTP MAINDAT TO TAXSIM (http://users.nber.org/~taxsim/taxsim27/)
 NOTE SURE IF THIS WILL WORK 
****************************************************************;
**************  USe new version of taxsim since 2016 (JH) ***********;
%let DATAFILE = %SUBSTR(&SYSTIME,1,2)%SUBSTR(&SYSTIME,4,2);
%PUT 'Filename on taxsimftp server is:' &DATAFILE;

filename txpydata ftp "&DATAFILE"         host='taxsimftp.nber.org'
         user='taxsim' pass='02138' cd='tmp' passive ;
filename results  ftp "&DATAFILE..txm27" host='taxsimftp.nber.org'
         user='taxsim' pass='02138' cd='tmp' passive lrecl=1024;
filename errors   ftp "&DATAFILE..msg" host='taxsimftp.nber.org'
         user='taxsim' pass='02138' cd='tmp' passive ;


*****  Different variables have to be submitted to TAXSIM since 2016 (JH) *******;
*****                      No State Tax data before 1977                  *******;
%macro create_dat(yr);

data;
set cpsdata.taxsim&yr;
file txpydata;
put id ref_year fill filestat age sage totdep totdep13 totdep17 totdep18 taxinc tfu_wifi 
    inc9 fill fill fill inc10 fill inc11 inc12 inc18 fill fill fill fill fill fill;
run;

* old version *;
/*
put id ref_year state filestat totdep num_gt65 taxinc tfu_wifi
   inc9 inc10 inc11 inc12 fill fill fill fill fill inc18 kid_fcc fill fill fill;


/* Read results directly from taxsimftp server. */

DATA ts_all;
	INFILE results firstobs=2;
	INPUT id year ts_state ftl stl ts_fica;
RUN;

PROC MEANS DATA=ts_all;
RUN;

DATA taxsim.taxs&yr;
   SET ts_all;
RUN;
PROC SORT data=taxsim.taxs&yr;
BY id;
RUN;

/* Read and print to log the taxsim error message file. */
data;
infile errors;
input;put 'TAXSIM:' _infile_;
run;


%mend create_dat;

%create_dat(1964);
%create_dat(1965);
%create_dat(1966);
%create_dat(1967);




